# benötigte bib importieren
import pandas as pd
import numpy as np
import plotly.io as pio
import plotly.express as px
import seaborn as sns
pio.renderers.default = "notebook"
# datensatz laden mit pickle
file_input = "df_after_preprocessing.pkl"
df_cf = pd.read_pickle(file_input)
df_cf.head(5)
| funded_amount | loan_amount | activity | sector | use | country_code | country | currency | term_in_months | lender_count | repayment_interval | borrower_male_count | borrower_female_count | funding_pro_lender | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 300.0 | 300.0 | Fruits & Vegetables | Food | To buy seasonal, fresh fruits to sell. | PK | Pakistan | PKR | 12 | 12 | irregular | 0 | 1 | 25.00000 |
| 1 | 575.0 | 575.0 | Rickshaw | Transportation | to repair and maintain the auto rickshaw used ... | PK | Pakistan | PKR | 11 | 14 | irregular | 0 | 2 | 41.07143 |
| 2 | 150.0 | 150.0 | Transportation | Transportation | To repair their old cycle-van and buy another ... | IN | India | INR | 43 | 6 | bullet | 0 | 1 | 25.00000 |
| 3 | 200.0 | 200.0 | Embroidery | Arts | to purchase an embroidery machine and a variet... | PK | Pakistan | PKR | 11 | 8 | irregular | 0 | 1 | 25.00000 |
| 4 | 400.0 | 400.0 | Milk Sales | Food | to purchase one buffalo. | PK | Pakistan | PKR | 14 | 16 | monthly | 0 | 1 | 25.00000 |
df_group1 = df_cf.loc[:, ["funded_amount", "loan_amount"]]
df_group1["amount_reached"] = (df_group1.funded_amount >= df_group1.loan_amount)
df_group1
| funded_amount | loan_amount | amount_reached | |
|---|---|---|---|
| 0 | 300.0 | 300.0 | True |
| 1 | 575.0 | 575.0 | True |
| 2 | 150.0 | 150.0 | True |
| 3 | 200.0 | 200.0 | True |
| 4 | 400.0 | 400.0 | True |
| ... | ... | ... | ... |
| 646828 | 0.0 | 25.0 | False |
| 646829 | 0.0 | 25.0 | False |
| 646830 | 0.0 | 125.0 | False |
| 646831 | 0.0 | 875.0 | False |
| 646832 | 0.0 | 250.0 | False |
642934 rows × 3 columns
df_group11 = df_group1.groupby(["amount_reached"]).agg(ar_rate = ("amount_reached", "size"))
df_group11
| ar_rate | |
|---|---|
| amount_reached | |
| False | 47395 |
| True | 595539 |
# plotten
my_pie1 = px.pie(
values=df_group11.loc[:,'ar_rate'],
names=df_group11.index ,
title='Are projects reached the target?',
color=df_group11.index,
color_discrete_map={1:'green',0:'red'},
width=600,
height=400
)
my_pie1.update_layout(legend_traceorder="reversed",
legend_title_text='Are people interested in Projects?',
)
my_pie1.update_traces(hoverinfo='percent + label',
textinfo='value + percent',
insidetextorientation='horizontal',
textposition='inside'
)
my_pie1.data[0].labels = ['No', 'Yes'] # Labels ändern
my_pie1.show()
92% of the projects had reaached the loan amount, the target. That shows that our platform is successfull and taking the attention of people. Since this is a funding platform, even the target is not reached, summed amount is paid to borrowers. Actually we can say that it is more successfull when you think there are projects reached to %90.
df_group21 = df_cf.groupby(["country"]).agg(nof_project_male = ("borrower_male_count", "sum"), \
nof_project_female = ("borrower_female_count", "sum")).reset_index()
df_group21["total_project"] = df_group21.nof_project_female + df_group21.nof_project_male
df_group21.sort_values("total_project", ascending=False, inplace=True)
df_group21.head(5)
| country | nof_project_male | nof_project_female | total_project | |
|---|---|---|---|---|
| 34 | Kenya | 54662.0 | 106152.0 | 160814.0 |
| 59 | Philippines | 8358.0 | 145890.0 | 154248.0 |
| 57 | Paraguay | 2523.0 | 116608.0 | 119131.0 |
| 11 | Cambodia | 12752.0 | 55523.0 | 68275.0 |
| 58 | Peru | 17356.0 | 45760.0 | 63116.0 |
wide_df = df_group21.head(20)
fig = px.bar(wide_df, x="country", y=["nof_project_male", "nof_project_female"], \
title="Projects male versus female initiated", labels={'country':'Countries','value':'Number of Projects'})
fig.update_layout(legend=dict(title="Gender initiaded projects"))
fig.show()
It seems that our platform is used mostly by womens. Even in Kenya where women has very small power in the society women used this platform more than men. In Philippines and Paraguay men has nearly no vale versus womens. So we can focus on womens needs (reaching and using the platform) to make it more effective. And we can also focus on why men are not using as much as women. That is normal that underdeveloped countries such as Kenya Philippines, Cambodia are using the platforms to fund their needs, when they have no chance to fulfill on their own
df_group22 = df_cf.groupby(["sector"]).agg(nof_project_male = ("borrower_male_count", "sum"), \
nof_project_female = ("borrower_female_count", "sum")).reset_index()
df_group22["total_project"] = df_group22.nof_project_female + df_group22.nof_project_male
df_group22.sort_values("total_project", ascending=False, inplace=True)
df_group22.head(5)
| sector | nof_project_male | nof_project_female | total_project | |
|---|---|---|---|---|
| 0 | Agriculture | 110612.0 | 258277.0 | 368889.0 |
| 6 | Food | 39538.0 | 284637.0 | 324175.0 |
| 11 | Retail | 26747.0 | 199056.0 | 225803.0 |
| 2 | Clothing | 8174.0 | 80106.0 | 88280.0 |
| 12 | Services | 13102.0 | 62573.0 | 75675.0 |
wide_df = df_group22.head(20)
fig = px.bar(wide_df, x="sector", y=["nof_project_male", "nof_project_female"], \
title="Projects male versus female initiated", labels={'sector':'Sectors','value':'Number of Projects'})
fig.update_layout(legend=dict(title="Gender initiaded projects"))
fig.show()
Agriculrure and Food is the main sectors for funding platforms. Normally man is a little bit stronger in Agriculture sector versus other sectors. However even in this sectors women made great job.
df_group3 = df_cf.loc[:,["sector", "activity", "country"]]
df_group3["cnt"] = 1
df_group31 = df_group3.groupby(["sector", "activity", "country"]).agg(nof_project = ("cnt", "sum")).reset_index()
df_group31 = df_group31.loc[df_group31.nof_project > 0,:]
df_group31
| sector | activity | country | nof_project | |
|---|---|---|---|---|
| 88 | Agriculture | Agriculture | Albania | 174 |
| 89 | Agriculture | Agriculture | Armenia | 600 |
| 90 | Agriculture | Agriculture | Azerbaijan | 70 |
| 91 | Agriculture | Agriculture | Belize | 68 |
| 92 | Agriculture | Agriculture | Benin | 2 |
| ... | ... | ... | ... | ... |
| 212706 | Wholesale | Wholesale | Uganda | 42 |
| 212707 | Wholesale | Wholesale | Ukraine | 1 |
| 212708 | Wholesale | Wholesale | United States | 10 |
| 212710 | Wholesale | Wholesale | Vietnam | 2 |
| 212714 | Wholesale | Wholesale | Zimbabwe | 8 |
6240 rows × 4 columns
df = df_group31
fig = px.sunburst(df, path=['sector', 'activity'], color='nof_project', width=1000, height=800,
title="Sectors and Activities")
fig.show()
C:\Users\alfa\anaconda3\lib\site-packages\plotly\express\_core.py:1637: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead. C:\Users\alfa\anaconda3\lib\site-packages\plotly\express\_core.py:1637: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.
These 5 services are always gets attention for the funding platforms: Food, Agriculture, Retail, Services and Clothing. Food services are mostly for Food production/Sales requested, Agriculture for farming.
fig = px.treemap(df, path=[px.Constant('country'), 'sector', 'country'],
color='nof_project', title="Sectors and Countries")
fig.show()
C:\Users\alfa\anaconda3\lib\site-packages\plotly\express\_core.py:1637: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead. C:\Users\alfa\anaconda3\lib\site-packages\plotly\express\_core.py:1637: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead. C:\Users\alfa\anaconda3\lib\site-packages\plotly\express\_core.py:1637: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.
Retail sector used by mostly Philippines, services mostly by Kenya, Food by Colombia.
df = df_group31
fig = px.scatter_geo(df, locations="country", color="sector",
hover_name="country", size="nof_project",
projection="natural earth", locationmode = 'country names')
fig.show()
When we look the platforms used countries on the world map, We see Phillipines, Kenya Cambodia, some other African, Asian, American countries.
df_group4 = df_cf.loc[:,["sector", "country", "funding_pro_lender"]]
df_group4.loc[df_group4.funding_pro_lender > 0,:]
df_group4 = df_group4.groupby(["sector", "country"]).agg(avg_fund_amount = ("funding_pro_lender", "mean")).reset_index()
df_group4 = df_group4.loc[df_group4.avg_fund_amount.isna() == False,:]
df_group4
| sector | country | avg_fund_amount | |
|---|---|---|---|
| 1 | Agriculture | Albania | 31.394106 |
| 2 | Agriculture | Armenia | 48.468452 |
| 3 | Agriculture | Azerbaijan | 39.485279 |
| 4 | Agriculture | Belize | 27.167610 |
| 5 | Agriculture | Benin | 35.208199 |
| ... | ... | ... | ... |
| 1298 | Wholesale | United States | 82.172058 |
| 1300 | Wholesale | Vietnam | 35.768745 |
| 1302 | Wholesale | Yemen | 29.761906 |
| 1303 | Wholesale | Zambia | 27.809557 |
| 1304 | Wholesale | Zimbabwe | 33.176941 |
987 rows × 3 columns
df = df_group4
fig = px.scatter(df, x="sector", y="avg_fund_amount", color="country",
size='avg_fund_amount', title="Average amount of funds for Countries and Sectors")
fig.update_layout(legend=dict(title="Countries"))
fig.add_hline(
y=400,
line_width=3,
line_dash="dash",
line_color="black")
fig.show()
Average amount of Funds, very interesting is Services in Congo had 816. then clothing in Paraguay. But when we check clothing and services have higher average fund amount.
df_group5 = df_cf.loc[df_cf.funded_amount < df_cf.loan_amount,["sector", "activity", "country"]]
df_group5["cnt"] = 1
df_group5 = df_group5.groupby(["sector", "activity", "country"]).agg(nof_project = ("cnt", "sum")).reset_index()
df_group5 = df_group5.loc[df_group5.nof_project > 0,:]
df_group5
| sector | activity | country | nof_project | |
|---|---|---|---|---|
| 88 | Agriculture | Agriculture | Albania | 10 |
| 89 | Agriculture | Agriculture | Armenia | 134 |
| 90 | Agriculture | Agriculture | Azerbaijan | 13 |
| 94 | Agriculture | Agriculture | Bolivia | 47 |
| 96 | Agriculture | Agriculture | Burkina Faso | 4 |
| ... | ... | ... | ... | ... |
| 204970 | Wholesale | Goods Distribution | Zambia | 3 |
| 212643 | Wholesale | Wholesale | Colombia | 4 |
| 212682 | Wholesale | Wholesale | Pakistan | 1 |
| 212683 | Wholesale | Wholesale | Palestine | 1 |
| 212708 | Wholesale | Wholesale | United States | 5 |
2374 rows × 4 columns
my_view = px.scatter(data_frame=df_group5,
x='nof_project',
y='sector',
color='country',
# color_discrete_map={'Tesla':'blue','Volkswagen':'red','BMW':'orange','Audi':'green'},
symbol='activity',
# hover_name='Model',
title='Number of Projects unsuccessfull')
my_view.update_traces(marker={'size':10,
'line':{'width':1}})
my_view.add_vline(
x=350,
line_width=3,
line_dash="dash",
line_color="black")
my_view.show()
Kenya with 1831 Farming projects is the most unsuccessful funding organisation. Thinking the weather conditions it is normal to have too many farming funding events, and as a result unreached targets. Then comes El Salvador with 1293 Housing expenses and Phillippines with General Store.